here::set_here()
## File .here already exists in /Users/koenvandenberge/Dropbox/research/brainStat/hbcRegenGithub/scripts
suppressPackageStartupMessages({
  library(slingshot)
  library(SingleCellExperiment)
  library(cowplot)
  library(rgl)
  library(clusterExperiment)
  library(RColorBrewer)
  library(ggplot2)
  library(pheatmap)
  library(wesanderson)
  library(gridExtra)
  library(irlba)
  library(tidyverse)
})

Import data

sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
# counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
load("../data/ALL_TF.Rda")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")
timePoint <- readRDS("../data/finalTrajectory/timePoint_noResp_noMV.rds")

Select activated and renewed HBC clusters

I will select

  • activated HBCs as cells sampled in the 24h timepoint that are in the starting cluster
  • renewed (‘resting’) HBCs as cells sampled in the 14D timepoint that are in the HBC cluster
cols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
view3d( theta = 10, phi = 10)
# Datta labels
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clDatta], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")

# timepoints
plot3d(reducedDim(sds), aspect = 'iso', col=cols[timePoint], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")

# cluster labels used for TI
clSds <- apply(slingClusterLabels(sds), 1, which.max)
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clSds], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")


actHBCId <- which(clSds == 8 & timePoint == "24HPI")
restHBCId <- which(clDatta == "HBC" & timePoint == "14DPI")
grpHBC <- rep(NA, length(c(actHBCId, restHBCId)))
grpHBC[1:length(actHBCId)] <- "activated"
grpHBC[is.na(grpHBC)] <- "resting"
grpHBC <- factor(grpHBC)
countHBC <- counts[, c(actHBCId, restHBCId)]

Dimensionality reduction

library(scran)
library(uwot)
library(scater)
## 
## Attaching package: 'scater'
## The following object is masked from 'package:clusterExperiment':
## 
##     plotHeatmap
logFracs <- log(sweep(countHBC+1e-5, 2, colSums(countHBC), "/"))
gv <- modelGeneVar(countHBC)
hvg <- getTopHVGs(gv, n=1000)
pc10 <- irlba::irlba(logFracs[hvg,], nv=10)
um10 <- uwot::umap(pc10$v %*% diag(pc10$d))

plot(um10, col=alpha(c("orange", "darkseagreen3")[as.numeric(grpHBC)], .75), pch=16, cex=1/2, bty='l')
legend("topright", c("activated", "resting"), pch=16, col=c("orange", "darkseagreen3"), bty="n")

Batch correction with Harmony

library(harmony)
## Loading required package: Rcpp
harmony_embeddings <- HarmonyMatrix(logFracs, meta_data=grpHBC, do_pca=TRUE)
## Harmony 1/10
## Harmony 2/10
## Harmony converged after 2 iterations
umHarmony <- uwot::umap(harmony_embeddings)

plot(umHarmony, col=alpha(c("orange", "darkseagreen3")[as.numeric(grpHBC)], .75), pch=16, cex=1/2, bty='l')
legend("topright", c("activated", "resting"), pch=16, col=c("orange", "darkseagreen3"), bty="n")

Integration: Processing and dimensionality reduction of each dataset separately

library(Signac)
library(Seurat)
## 
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
library(ArchR)
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## The following object is masked from 'package:SummarizedExperiment':
## 
##     shift
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## Loading required package: rhdf5
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
set.seed(1234)
addArchRGenome("mm10")
## Setting default genome to Mm10.
# scATAC-seq importing and preprocessing
# from 20200610_scATAC_injury_archr_samples_pairwiseHBC_v3.Rmd
archProj <- loadArchRProject("../data/scATAC/archR_samples_pairwise_v2")
## Successfully loaded ArchRProject!
## 
##                                                    / |
##                                                  /    \
##             .                                  /      |.
##             \\\                              /        |.
##               \\\                          /           `|.
##                 \\\                      /              |.
##                   \                    /                |\
##                   \\#####\           /                  ||
##                 ==###########>      /                   ||
##                  \\##==......\    /                     ||
##             ______ =       =|__ /__                     ||      \\\
##         ,--' ,----`-,__ ___/'  --,-`-===================##========>
##        \               '        ##_______ _____ ,--,__,=##,__   ///
##         ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
##         -,____,---'       \\####\\________________,--\\_##,/
##            ___      .______        ______  __    __  .______      
##           /   \     |   _  \      /      ||  |  |  | |   _  \     
##          /  ^  \    |  |_)  |    |  ,----'|  |__|  | |  |_)  |    
##         /  /_\  \   |      /     |  |     |   __   | |      /     
##        /  _____  \  |  |\  \\___ |  `----.|  |  |  | |  |\  \\___.
##       /__/     \__\ | _| `._____| \______||__|  |__| | _| `._____|
## 
clHBCMerged <- archProj$clHBCMerged
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData", 
              name = "clHBCMerged", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab2719972642-Date-2021-12-23_Time-18-03-51.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab2719972642-Date-2021-12-23_Time-18-03-51.log

archrUmap <- ArchR::getEmbedding(archProj, embedding = "UMAP_cor")
colnames(archrUmap) <- c("UMAP1", "UMAP2")
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData", 
              name = "treat", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab271250c42e-Date-2021-12-23_Time-18-03-54.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab271250c42e-Date-2021-12-23_Time-18-03-54.log

clHBCMerged2 <- as.character(getCellColData(archProj, "clHBCMerged")$clHBCMerged)
clHBCMerged2[clHBCMerged2 == "C1_Inj"] <- "Activated"
clHBCMerged2[clHBCMerged2 == "C2_Hybrid"] <- "Hybrid"
clHBCMerged2[clHBCMerged2 == "C3_UI"] <- "Resting"
archrUmap$clHBCMerged2 <- factor(clHBCMerged2)
archrUmap$treatment <- getCellColData(archProj, "treat")$treat
peaks <- archProj@peakSet
names(peaks) <- paste0("peak",1:length(peaks))
atacPeakMat <- readRDS("../data/scATAC/peakMatArchrHbc_v2.rds")
rownames(atacPeakMat) <- names(peaks)
geneActivity <- readRDS("../data/scATAC/geneMatArchrHbc.rds")
atac <- Seurat::CreateSeuratObject(counts = atacPeakMat,
                           assay = "ATAC",
                           project = "10x_ATAC")
atac[["ACTIVITY"]] <- CreateAssayObject(counts = geneActivity)
atac$tech <- "atac"
atac$treatment <- archProj@cellColData$treat
atac$clHBCMerged <- clHBCMerged
# process gene activity
DefaultAssay(atac) <- "ACTIVITY"
atac <- FindVariableFeatures(atac)
atac <- NormalizeData(atac)
atac <- ScaleData(atac)
## Centering and scaling data matrix
# process peak matrix
DefaultAssay(atac) <- "ATAC"
# VariableFeatures(atac) <- names(which(Matrix::rowSums(atac) > 50))
# atac <- RunLSI(atac, n = 30, scale.max = NULL)
# atac <- RunUMAP(atac, reduction = "lsi", dims = 1:30)
atac <- FindTopFeatures(atac, min.cutoff = 'q0')
# Seurat 3
atac <- RunLSI(atac, n = 30) 
## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac

## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac

## Warning: RunLSI is being moved to Signac. Equivalent functionality can be
## achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for more
## information on Signac, please see https://github.com/timoast/Signac
## Performing TF-IDF normalization
## Running SVD on TF-IDF matrix
## Scaling cell embeddings
# Seurat 4
#atac <- RunTFIDF(atac)
#atac <- RunSVD(atac, n = 30, reduction.key = "lsi_", reduction.name = "lsi")
# drop first dim as often correlated with seq depth
atac <- RunUMAP(object = atac, reduction = 'lsi', dims = 2:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 18:05:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:05:20 Read 4076 rows and found 29 numeric columns
## 18:05:20 Using Annoy for neighbor search, n_neighbors = 30
## 18:05:20 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:05:20 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab27747fb166
## 18:05:20 Searching Annoy index using 1 thread, search_k = 3000
## 18:05:22 Annoy recall = 100%
## 18:05:23 Commencing smooth kNN distance calibration using 1 thread
## 18:05:25 Initializing from normalized Laplacian + noise
## 18:05:25 Commencing optimization for 500 epochs, with 152690 positive edges
## 18:05:32 Optimization finished
# scRNA-seq import
rna <- CreateSeuratObject(counts=countHBC)
rna$tech <- '10x'
rna$celltype <- grpHBC
rna <- FindVariableFeatures(rna, selection.method = "vst", nfeatures = 2000)
rna <- ScaleData(rna)
## Centering and scaling data matrix
rna <- RunPCA(rna, npcs=20)
## PC_ 1 
## Positive:  Slc6a6, Rps27, Junb, Aqp3, Btg2, Fos, Fxyd3, Apoe, Krt5, Jun 
##     Gclm, Socs3, Ier2, Gpm6a, Ubc, Ctsl, Cyr61, Ifitm3, Laptm4a, Atf3 
##     Gstm2, Csrp1, Ptn, Pltp, Fosb, Dusp1, Igfbp2, Klf4, Krt14, Gas6 
## Negative:  Tmsb10, Cystm1, Atf5, Plaur, Adam8, Krt6a, Map1b, Tuba1a, Tubb2b, Cst6 
##     Ptma, Ecm1, Stmn1, Emp3, Uchl1, Upk3bl, Ceacam1, Clcf1, Tm4sf1, Gprc5a 
##     Cldn7, Tubb6, Lypd3, Cldn3, Lamc2, Phlda2, Ncam1, Cldn4, Mal, Sult2b1 
## PC_ 2 
## Positive:  Igfbp2, Cald1, Tmsb10, Ogn, Tmsb4x, Igfbp4, Adam8, Krt5, Apoe, Plaur 
##     Fam101b, Tspan6, Abca4, Sparc, Tm4sf1, Col23a1, Lamc2, Krt6a, Clcf1, Tmem108 
##     Slc29a1, Krt14, Tubb6, Ngf, Cst6, Oaf, Tubb2b, Ecm1, Hmga2, Car12 
## Negative:  Prdx6, Adh7, Gsta3, Sftpd, Cyp2f2, Ly6a, Tst, Slc16a11, Ifitm1, Reg3g 
##     Muc1, Por, Cbr2, Adh1, Lrrc26, Selenbp1, Mgst1, Gpx2, Sult1d1, Gstp1 
##     Cbr3, Cd14, Cyp4b1, Cxcl17, Gsto1, Serpinb11, Elf3, Ly6c1, Serpina3n, Epas1 
## PC_ 3 
## Positive:  Adh7, Aldh3a1, Selenbp1, Adh1, Ly6a, Oat, Defb1, Fmo2, Cbr3, Gstp1 
##     Tmem176a, Atp1b1, Tmem176b, Efemp1, Igfbp4, Upk1b, Sparc, Ly6c1, Tgm2, Igfbp2 
##     Mgp, Kcnj16, Sult1d1, Gmpr, Cdo1, Krt15, Nrtn, Cyp4b1, Muc1, Bsg 
## Negative:  Cldn4, X1810011O10Rik, Klf4, Sfn, Zfp36, S100a10, Mast4, Atf3, Tagln2, Fosb 
##     Avpi1, Arc, Nr4a1, Cldn7, Nfat5, Egr1, Klf6, Cdkn1a, Phlda1, Lmna 
##     Crym, Neat1, Anxa1, Krt18, S100a6, Acsm4, Lypd3, Papss2, Ier2, Krt23 
## PC_ 4 
## Positive:  mt.Co1, mt.Co3, mt.Atp6, Hpgd, Pla2r1, X1500009L16Rik, Acsm4, Rps27rt, Car12, Dapl1 
##     Myo6, Fth1, Papss2, Lypd2, Insl6, Cyb5r3, Scd2, Gas6, Trp63, Nmb 
##     Eya1, Piezo2, Limd2, Crym, Sema5a, Serpinb1b, Ftl1, X4631405K08Rik, Fndc1, Scgb1c1 
## Negative:  Actg1, Anxa3, Ier3, Nfkbia, F3, Tmem176b, Krt7, Sfn, Cyr61, Cldn4 
##     Krt14, Tacstd2, Tuba1c, Tmem176a, Fos, Bhlhe40, Junb, Tnfrsf12a, Gmpr, Cish 
##     Fst, Cxcl16, Plaur, Krt17, Ubc, Trib1, Oat, Atf3, Adam8, Dusp5 
## PC_ 5 
## Positive:  Fosb, Egr1, Neat1, Irs2, Pdcd4, Nr4a1, Lmna, Klf9, Cbx3, Dst 
##     Ccdc3, Mast4, Sdc4, Timp3, Nfia, Kdm6b, Birc5, Maff, Spry2, Il6st 
##     Stmn1, Hist1h2ap, Nfat5, Sik1, Arid5a, Gm26532, Pbk, Gclc, Tpm1, Fam101b 
## Negative:  Aldh2, Gstm1, Prdx1, Aqp3, Ces1d, Tst, Gsn, Clu, Ephx1, Cpe 
##     Ifitm3, Ubc, Krt18, Prdx6, Hspb1, Galm, Fmo6, Hspa5, Ppa1, Krt5 
##     Krt8, Pltp, Mgst1, Ctsl, Cyp2a5, Alas1, Cryl1, Calml4, Gstm2, Actg1
rna <- RunUMAP(rna, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## 18:05:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:05:36 Read 3064 rows and found 20 numeric columns
## 18:05:36 Using Annoy for neighbor search, n_neighbors = 30
## 18:05:36 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:05:37 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab276b57db4a
## 18:05:37 Searching Annoy index using 1 thread, search_k = 3000
## 18:05:38 Annoy recall = 100%
## 18:05:39 Commencing smooth kNN distance calibration using 1 thread
## 18:05:41 Initializing from normalized Laplacian + noise
## 18:05:41 Commencing optimization for 500 epochs, with 122146 positive edges
## 18:05:46 Optimization finished
p1 <- DimPlot(atac, reduction = "umap", group.by="treatment", label=TRUE) + 
  NoLegend() + 
  ggtitle("scATAC-seq")
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
p2 <- DimPlot(rna, group.by = "celltype", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
p1 + p2

# Markers for activated/resting HBC in scRNA-seq
FeaturePlot(rna, features = "Krtdap")

FeaturePlot(rna, features = "Krt6a")

FeaturePlot(rna, features = "Krt5")

FeaturePlot(rna, features = "Krt14")

FeaturePlot(rna, features = "Trp63")

# Markers for activated/resting HBC in scATAC-seq
FeaturePlot(atac, features = "Krtdap") + xlim(c(-5,7))
## Warning: Could not find Krtdap in the default search locations, found in
## ACTIVITY assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 14 rows containing missing values (geom_point).

FeaturePlot(atac, features = "Krt6a") + xlim(c(-5,7))
## Warning: Could not find Krt6a in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 14 rows containing missing values (geom_point).

FeaturePlot(atac, features = "Krt5") + xlim(c(-6,6))
## Warning: Could not find Krt5 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).

FeaturePlot(atac, features = "Krt14") + xlim(c(-6,6))
## Warning: Could not find Krt14 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).

FeaturePlot(atac, features = "Trp63") + xlim(c(-6,6))
## Warning: Could not find Trp63 in the default search locations, found in ACTIVITY
## assay instead
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 55 rows containing missing values (geom_point).

Assign sub-hybrid labels to RNA-seq

ctHybridSub <- as.character(rna$celltype)
hlpid <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1)
ctHybridSub[hlpid] <- NA
hlp2id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1 & rna@reductions$umap@cell.embeddings[,2]<4)
ctHybridSub[hlp2id] <- "hybrid1"
hlp1id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>4)
ctHybridSub[hlp1id] <- "hybrid2"
rna$ctHybridSub <- ctHybridSub


DimPlot(rna, group.by = "ctHybridSub", label = FALSE, repel = TRUE) +
  ggtitle("scRNA-seq") 

table(ctHybridSub)
## ctHybridSub
## activated   hybrid1   hybrid2   resting 
##       602       130       148      2184

Explore respiratory markers

expressionBoxplot <- function(rna, gene){
  counts <- as.vector(rna@assays$RNA[gene,])
  df <- data.frame(counts=log1p(counts),
                   celltype=rna$ctHybridSub)
  ggplot(df, aes(x=celltype, y=counts)) +
    geom_boxplot() +
    xlab("Cell state") +
    ylab("Log(Expression + 1)") +
    theme_bw() +
    theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
    ggtitle(gene)
}
# Reg3g: respiratory marker
FeaturePlot(rna, features = "Reg3g")

expressionBoxplot(rna, "Reg3g")

plotEmbedding(
    ArchRProj = archProj, 
    colorBy = "GeneScoreMatrix", 
    name = "Reg3g", 
    embedding = "UMAP_cor",
    quantCut = c(0.01, 0.95),
    size=1/3,
    plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab273b3f15f1-Date-2021-12-23_Time-18-05-57.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:05:57 :
## 1 2 3 4 5 6 
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab273b3f15f1-Date-2021-12-23_Time-18-05-57.log

# FOXJ1: respiratory ciliated marker
FeaturePlot(rna, features = "Foxj1")

expressionBoxplot(rna, "Foxj1")

plotEmbedding(
    ArchRProj = archProj, 
    colorBy = "GeneScoreMatrix", 
    name = "Foxj1", 
    embedding = "UMAP_cor",
    quantCut = c(0.01, 0.95),
    size=1/3,
    plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab272ab99f9a-Date-2021-12-23_Time-18-06-00.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:01 : 
## 1 2 3 4 5 6 
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab272ab99f9a-Date-2021-12-23_Time-18-06-00.log

# Muc5ac: respiratory secretory marker
FeaturePlot(rna, features = "Muc5ac")

expressionBoxplot(rna, "Muc5ac")

plotEmbedding(
    ArchRProj = archProj, 
    colorBy = "GeneScoreMatrix", 
    name = "Muc5ac", 
    embedding = "UMAP_cor",
    quantCut = c(0.01, 0.95),
    size=1/3,
    plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab279d4d60-Date-2021-12-23_Time-18-06-04.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:04 : 
## 1 2 3 4 5 6 
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab279d4d60-Date-2021-12-23_Time-18-06-04.log

# Cbr2: respiratory basal cell marker (Boying's analyses)
FeaturePlot(rna, features = "Cbr2")

expressionBoxplot(rna, "Cbr2")

plotEmbedding(
    ArchRProj = archProj, 
    colorBy = "GeneScoreMatrix", 
    name = "Cbr2", 
    embedding = "UMAP_cor",
    quantCut = c(0.01, 0.95),
    size=1/3,
    plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab275742d9f0-Date-2021-12-23_Time-18-06-07.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:07 : 
## 1 2 3 4 5 6 
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab275742d9f0-Date-2021-12-23_Time-18-06-07.log

# Sult1c1: respiratory basal cell and respiratory epithelial marker (Boying's analyses)
FeaturePlot(rna, features = "Sult1c1")

expressionBoxplot(rna, "Sult1c1")

plotEmbedding(
    ArchRProj = archProj, 
    colorBy = "GeneScoreMatrix", 
    name = "Sult1c1", 
    embedding = "UMAP_cor",
    quantCut = c(0.01, 0.95),
    size=1/3,
    plotAs="points")
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ab2765f24851-Date-2021-12-23_Time-18-06-10.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-12-23 18:06:10 : 
## 1 2 3 4 5 6 
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ab2765f24851-Date-2021-12-23_Time-18-06-10.log

Explore hybrid1 markers from ATAC in lineage-traced RNA-seq data

hybrid1Markers <- c("Muc5b", "Sult1c1")

FeaturePlot(rna, features = "Muc5b")

expressionBoxplot(rna, "Muc5b")

FeaturePlot(rna, features = "Sult1c1")

expressionBoxplot(rna, "Sult1c1")

Explore hybrid2 markers from ATAC in lineage-traced RNA-seq data

Note that Pax7 seems to fuse with a FOX TF.. https://www.genecards.org/cgi-bin/carddisp.pl?gene=PAX7

hybrid2Markers <- c("Pax3", "Pax7", "Pax9", "Dmrt2", "2610016A17Rik")

FeaturePlot(rna, features = "Pax3")

expressionBoxplot(rna, "Pax3")

FeaturePlot(rna, features = "Pax7")

expressionBoxplot(rna, "Pax7")

FeaturePlot(rna, features = "Pax9")

expressionBoxplot(rna, "Pax9")

FeaturePlot(rna, features = "Dmrt2")

expressionBoxplot(rna, "Dmrt2")

Explore hybrid markers in whole OE RNA-seq data

expressionBoxplot2 <- function(rnaWOE, gene){
  if(!gene %in% rownames(rnaWOE@assays$RNA)){
    return(NULL)
  }
  counts <- as.vector(rnaWOE@assays$RNA[gene,])
  ct <- rnaWOE$celltype
  ct[ct == "Activated HBC"] <- "HBC*"
  ct[ct == "Respiratory basal cell"] <- "Resp BC"
  ct[ct == "Resting HBC"] <- "HBC"
  df <- data.frame(counts=log1p(counts),
                   celltype=ct)
  ggplot(df, aes(x=celltype, y=counts)) +
    geom_boxplot() +
    xlab("Cell state") +
    ylab("Log(Expression + 1)") +
    theme_bw() +
    theme(axis.text.x = element_text(angle=45, vjust=0.5)) +
    ggtitle(gene)
}
hybrid1Top10Markers <- c("Epas1", "Ptgfr", "Kcnj1", "Tmc5", "Pla2g2c", "Muc5b",
                       "B3galt1", "Tacr1", "Sult1c1", "Arhgef38")
hybrid2Top10Markers <- c("Gm32865", "2610016A17Rik", "Pax9", "Pax3", "Atp10a",
                         "Kcnt2", "Aldh1a7", "Aldh1a1", "Cfh", "Cap2", "Pcare")

# scRNA-seq import of whole OE data
# Import whole OE data
sceWOE <- readRDS("../data/wholeOE/sceWOE.rds")
# scRNA-seq import of lineage-traced data
rnaWOE <- CreateSeuratObject(counts=as.matrix(assays(sceWOE)$counts))
rnaWOE$tech <- '10x'
rnaWOE$celltype <- colData(sceWOE)$seurat_annotated
rnaWOE <- FindVariableFeatures(rnaWOE, selection.method = "vst", nfeatures = 2000)
rnaWOE <- ScaleData(rnaWOE)
## Centering and scaling data matrix
rnaWOE <- RunPCA(rnaWOE, npcs=20)
## PC_ 1 
## Positive:  Hpgd, Aox2, Abca4, Nrcam, Car12, Azgp1, Abi3bp, Kcnj13, Gpr155, Apoc1 
##     Dcn, Gpha2, Ccdc153, 1500009L16Rik, Timp3, Sostdc1, Insl6, Itpkb, Irf1, 1200007C13Rik 
##     Cldn11, Hsd17b6, Defb1, Acsm4, Adh1, H2-Eb1, Cd36, Hist2h4, Fmo3, Gal3st2c 
## Negative:  Ran, Hsp90ab1, Ranbp1, Eif5a, Hnrnpf, Npm1, Pfn1, Serbp1, Set, Nme1 
##     Eif4a1, Ybx1, Anp32b, Rps2, Nhp2, Hspd1, Cct2, Ncl, Snrpd1, Ube2m 
##     Cct8, Prmt1, Hspe1, Hnrnpab, Ppp1r14b, Tubb5, Cct3, Rpsa, Cct5, Pa2g4 
## PC_ 2 
## Positive:  Igfbp2, Nrcam, Basp1, Tinagl1, Car12, Hpgd, Krt5, Krt14, Abca4, Axl 
##     G0s2, Ogn, Slc1a3, Thbs1, Ass1, Cald1, Fgfbp1, Col18a1, Krt17, Nrg1 
##     Igfbp4, Fscn1, Itga3, Azgp1, Bin1, 1500009L16Rik, Palld, Pxdc1, Ddah1, Tubb6 
## Negative:  Tspan1, Serpinb11, Bpifa1, Muc4, Reg3g, Fetub, Pon1, Tst, Gabrp, Tspan13 
##     Lypd2, Slc16a11, Lrrc26, Tmc5, Slc31a1, Cmtm8, Clic6, Slc44a4, Cyp2j6, Mlph 
##     Cxcl17, Xbp1, Sec11c, Myo5c, Fam174b, 5330417C22Rik, Muc16, Vtcn1, Pdzk1ip1, Tff2 
## PC_ 3 
## Positive:  Stmn1, Spc24, Tk1, Cenph, Pbk, Clspn, Smc2, Cenpw, Cenpm, Asf1b 
##     Smc4, Ndc80, Ccdc34, Pclaf, Atad2, Hmgb2, Tacc3, Cenpk, Tyms, Cdk1 
##     Anln, Birc5, Rad51, Rrm2, H2afz, Aurkb, Rrm1, Melk, Rad51ap1, Bub1 
## Negative:  Cldn4, Pttg1ip, Plaur, Adam8, Lamc2, Pls3, Fst, Rbms1, St14, Ngf 
##     Cdh1, Capn2, Ifi202b, Plin2, Irf6, Zfp593, Nabp1, Palld, Rtn4, Tnfrsf12a 
##     Msn, Sfn, Rnf19b, Hbegf, Clcf1, Tubb6, Fosl1, Prrg4, Taldo1, Glrx 
## PC_ 4 
## Positive:  Tgm2, Adh7, 4833423E24Rik, Gpx2, Abi3bp, Gsto1, Ly6a, Adh1, Ly6d, Anxa8 
##     Serpinb5, Mgp, Defb1, Cd14, Cfh, Fmo3, Pax9, Pir, Gm20186, Cdc14a 
##     Cdh13, Slc39a4, Col25a1, Capg, Pcolce, Sat1, Gdpd2, Rbp1, Irx3, Gda 
## Negative:  Nrcam, Basp1, Aox2, Car12, Hpgd, Azgp1, Igfbp2, Ermn, 1500009L16Rik, Scgb1c1 
##     Mdk, Abca4, G0s2, Insl6, Nt5dc2, Calml4, Galm, Pdlim3, Tmem108, Ogn 
##     Plscr2, Nectin3, Hsd17b6, Kcnj13, Sostdc1, Gal3st2c, Pcdh17, Crabp2, Slc1a3, Mef2c 
## PC_ 5 
## Positive:  Anxa1, Lmo7, Nectin2, Cldn3, Ctsh, St14, Tubb2b, Cldn7, Rab11fip1, Ceacam1 
##     Crabp2, Cryab, Espn, Cldn6, Mal, Cdh1, Cxcl16, Blnk, Elf5, Cldn9 
##     Hebp2, Cldn4, Crb3, Tjp2, Gm14137, Akap12, Ndrg1, Elf3, Tacstd2, Metrnl 
## Negative:  Htra1, Nrg1, Ung, Slc16a1, Bpifa1, Glyat, Srm, Ano1, Mcm7, Sssca1 
##     Npm3, Tff2, Dctpp1, Pold2, Psmc3ip, Cotl1, Lypd2, Bpifb1, Aldh1a1, Pdzrn4 
##     Tipin, Axl, Ccbe1, Cdca7, Tcn2, Pon1, Reg3g, Nes, Sigmar1, Muc16
rnaWOE <- RunUMAP(rnaWOE, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## 18:06:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:06:28 Read 2954 rows and found 20 numeric columns
## 18:06:28 Using Annoy for neighbor search, n_neighbors = 30
## 18:06:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:06:28 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//RtmpXyXqlR/fileab2788bef75
## 18:06:28 Searching Annoy index using 1 thread, search_k = 3000
## 18:06:29 Annoy recall = 100%
## 18:06:30 Commencing smooth kNN distance calibration using 1 thread
## 18:06:32 Initializing from normalized Laplacian + noise
## 18:06:32 Commencing optimization for 500 epochs, with 123922 positive edges
## 18:06:38 Optimization finished
pp1 <- pp2 <- list()
for(gg in 1:9){
  pp1[[gg]] <- expressionBoxplot2(rnaWOE, gene=hybrid1Top10Markers[gg])
  pp2[[gg]] <- expressionBoxplot2(rnaWOE, gene=hybrid2Top10Markers[gg])
}
cowplot::plot_grid(plotlist=pp1)

cowplot::plot_grid(plotlist=pp2)

# Would the subset of respiratory basal cells that express Krt5, Muc5b and Sult1c1 actually be hybrid cells? It looks like it based on markers...
p1 <- FeaturePlot(rnaWOE, hybrid1Top10Markers[1:9], combine = FALSE)
p1Norm <- FeaturePlot(NormalizeData(rnaWOE), hybrid1Top10Markers[1:9], combine = FALSE)
for(i in 1:length(p1)) {
  p1[[i]] <- p1[[i]] + NoLegend()
  p1Norm[[i]] <- p1Norm[[i]] + NoLegend()
}
cowplot::plot_grid(plotlist = p1)

cowplot::plot_grid(plotlist = p1Norm)

p2 <- FeaturePlot(rnaWOE, hybrid2Top10Markers[1:9], combine = FALSE)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Gm32865
p2Norm <- FeaturePlot(NormalizeData(rnaWOE), hybrid2Top10Markers[1:9], combine = FALSE)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Gm32865
for(i in 1:length(p2)) {
  p2[[i]] <- p1[[i]] + NoLegend()
  p2Norm[[i]] <- p1Norm[[i]] + NoLegend()
}
cowplot::plot_grid(plotlist = p2)

cowplot::plot_grid(plotlist = p2Norm)